# This file reads in estiamted individual wealth HILDA data and saves as a qs file

# Preliminaries -----------------------------------------------------------

rm(list=ls())
gc()

memory.limit(size=25000)

# Master package loading
source("./R scripts/Master package loading.R", encoding="ISO-8859-1")

## location of indiv wealth data - generated separately in Stata from chapter 2 / appendix B-related code files published on the Commission's website
indiv_wealth_path <- "./individual_weight_vars_restricted.dta"

## where to save indiv wealth data
hilda_qs_path <- "./HILDA Wave 19/qs files/"


## hilda file paths
hilda_dta_path <- "./HILDA Wave 19/2. STATA 190/"

## get file locations of combined dta files
combined_files <- list.files(hilda_dta_path) %>% 
  str_subset("Combined_.+") %>% 
  str_remove(".dta")


# Read in data ------------------------------------------------------------

## Indiv wealth data for waves 2, 6, 10, 14, 18 
indiv_wealth <- read_dta(indiv_wealth_path)


  
# Add descriptions of wealth variables from the hh-level data -----------------

## see current labels
indiv_wealth_labs <- sapply(indiv_wealth, 
                            ## for each column, get the variable label
                            function(x) { 
                              ## get variable labels
                              label <- attributes(x)$label 
                              if(is.null(label)) (NA)
                              else label
                            }) %>% 
  as.data.frame %>% 
  rownames_to_column %>% 
  setNames(c("name", "label"))


## get names of indiv vars missing a label and replace p prefix with h prefix for matching
hh_wealth_vars <- indiv_wealth_labs %>% 
  filter(is.na(label) & name!="primary_residence_share") %>% 
  mutate(name = str_replace(name, "p", "h"))


## read in data for wave 18 for those variables
hilda_wave18 <- qread(paste0(hilda_qs_path, combined_files[18], ".qs")) %>% 
  ## select variables to keep (will still run if a wave doesn't contain all listed vars)
  select(matches(hh_wealth_vars$name))

## get labels of those vars in named list format
hh_wealth_labs <- lapply(hilda_wave18,
                         function(x) {
                           label <- attributes(x)$label %>% 
                             ## rename labels 
                             str_replace("Household", "Individual") %>% 
                             str_replace(" \\[weighted.*$", "")
                         }) %>% 
  setNames( names(.) %>% str_replace("h", "p"))


## apply new labels to indiv wealth data
var_label(indiv_wealth) <- hh_wealth_labs


## save in qs format
qsave(indiv_wealth, paste0(hilda_qs_path, "indiv_wealth_vars_restricted.qs"))


